# Kalmia analyze fruit size at end of season # Using fruit sizes calculated from image segmentation in Python with opencv # Callin Switzer # 20 October 2016 # 10 Feb 2017 Update: Conducted Statistical Modeling with LMER and GLMER # 11 March 2017 Update: added random effect to MER models to account for plant lineage # 24 April 2017 Update: changed figure labels
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
packages <- c("ggplot2", 'lme4', 'plyr', 'influence.ME', 'sjPlot', 'multcomp', 'lsmeans', 'Matrix')
ipak(packages)
## ggplot2 lme4 plyr influence.ME sjPlot
## TRUE TRUE TRUE TRUE TRUE
## multcomp lsmeans Matrix
## TRUE TRUE TRUE
setwd("/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/DatasetsSupplemental/")
theme_set(theme_classic())
kfrt <- read.csv("kalmiaFruitFinal.csv", stringsAsFactors = FALSE)
nrow(kfrt[kfrt$dia_mm == 20.0,]) # number of images taken
## [1] 92
# clean and process data
kfrt <- kfrt[kfrt$dia_mm != 20.0, ] # 20 is the reference object in segmented images
nrow(kfrt) # number of total fruits measured
## [1] 1305
# label treatmens and access numbers
kfrt$trt <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][2])
kfrt$accessNum <- sapply(X = 1:nrow(kfrt), FUN = function(x) strsplit(kfrt$plantNum[x], split = "__")[[1]][1])
kfrt$trt <- mapvalues(kfrt$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"))
# add plant lineage (all plants that start with 1129 are from the lineage, 673_69 )
plantInds <- 1:nrow(kfrt) %in% grep(pattern = "1129", x = kfrt$plantNum)
kfrt$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
ifelse(plantInds[x], "673_69", paste(strsplit(kfrt$plantNum[x], split = "_")[[1]][1:2], collapse = "_") )})
# count number of fruits
counts = as.data.frame(table(kfrt$plantNum))
counts$trt = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][2])
counts$trt <- mapvalues(counts$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"))
counts$accessNum = sapply(X = 1:nrow(counts), FUN = function(x) strsplit(as.character(counts$Var1[x]), split = "__")[[1]][1])
# add in the trts and accession numbers that had counts of 0
# read in error-checked datasheet
kalNotes <- read.csv("KalmiaDailyDatasheets_ErrorChecked.csv")
# get the accession numbers I should have
accNumsHave <- unique(kalNotes$Plant.Number)
accNumsHave <- accNumsHave[!(accNumsHave %in% c("", "Plant Number"))]
#change formatting to match above
accNumsHave <- gsub("#", "", accNumsHave)
accNumsHave <- gsub("\ ", "_", accNumsHave)
accNumsHave <- gsub("\\-", "_", accNumsHave)
accNumsHave <- gsub("\\*", "_", accNumsHave)
accNumsHave <-toupper(accNumsHave)
shouldHave <- as.data.frame(t(sapply(accNumsHave, function(x) as.character(paste(x, c(1:4), sep = "__")))))
row.names(shouldHave) <- NULL
shouldHave1 <- c(as.character(shouldHave[, 1]),
as.character(shouldHave[, 2]),
as.character(shouldHave[, 3]),
as.character(shouldHave[, 4]))
# find missing ones -- this is the ones that
# are in the daily datasheet that aren't in the fruit measurements
missingTrts <- setdiff(shouldHave1,unique(kfrt$plantNum[kfrt$trt != "5"]))
# this should be 0 -- the ones from the fruit measurements that aren't in the daily datasheet
setdiff(unique(kfrt$plantNum[kfrt$trt != "Unbagged_2"]), shouldHave1)
## character(0)
# here's the list of plants that had their bags/tags go missing during the experiment (meaning that
# I was unable to collect fruits)
# note: "677_66_MASS__1" was the only plant that was missing a tag during the final collection
# that wasn't missing in one of my previous checks.
NAList <- c("1129_74_E__4", "1129_74_C__4", "39_60_A__3", "685_70_A__2", "677_66_MASS__1")
# note: this ignores trt#5 which is the same as #3
ZeroFruitPlants <- missingTrts[!(missingTrts %in% NAList)]
zfdf <- data.frame(Var1 = ZeroFruitPlants,
Freq = 0,
trt = sapply(X = 1:length(ZeroFruitPlants),
FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]),
split = "__")[[1]][2]),
accessNum = sapply(X = 1:length(ZeroFruitPlants),
FUN = function(x) strsplit(as.character(ZeroFruitPlants[x]),
split = "__")[[1]][1])
)
zfdf$trt <- mapvalues(zfdf$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"))
## The following `from` values were not present in `x`: 3, 4, 5
countFin <- rbind(counts, zfdf)
# final fruit counts for the fruits collected at the end of the experiment
countFin <- countFin[order(countFin$accessNum, countFin$trt, decreasing = FALSE), ]
# change labels from unbagged to conntrol
countFin$trt <- mapvalues(countFin$trt, c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2")
, c("Bagged", "Bagged & Selfed", "Control",
"Unbagged & Outcrossed", "Control_2"))
# change levels, so that control is first
countFin$trt <- factor(countFin$trt, levels = c("Control","Control_2", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"))
plantInds <- 1:nrow(countFin) %in% grep(pattern = "1129", x = countFin$accessNum)
countFin$plantLineage <- sapply(1:length(plantInds), FUN = function(x){
ifelse(plantInds[x], "673_69", paste(strsplit(countFin$accessNum[x], split = "_")[[1]][1:2], collapse = "_") )})
ggplot(countFin, aes(x = trt , y = Freq, fill = trt)) +
geom_boxplot() +
# geom_violin()+
labs(y = "Number of fruits", x = "Treatment") +
scale_fill_brewer(name = "Treatment", palette = "Set1")
saveDir <- "/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/Manuscript/Media/"
ggsave(paste0(saveDir, "KalmiaFruitNumber.pdf"), width = 10, height = 8)
ggplot(countFin[!(countFin$trt %in% 'Control_2'), ], aes(x = trt , y = Freq, fill = trt)) +
geom_boxplot() +
# geom_point(aes(color = plantLineage)) +
labs(y = "Number of fruits", x = "Treatment") +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.position = 'none') +
scale_fill_brewer(name = "Treatment", palette = "Set1")
ggsave(paste0(saveDir, "KalmiaFruitNumber_trt1_4Only.pdf"), width = 5, height = 4)
# I'm ignoring control#2, because it was originally intended for another experiment (analysis wasn't planned)
cf1 <- droplevels(countFin[countFin$trt != "Control_2",])
nrow(countFin) # number of total counts, including Control2
## [1] 104
sum(cf1$Freq) # total number of fruits in the analysis of count
## [1] 1035
# Number of counts, excluding control_2
# this is different from the number of photos
# because I didn't take photos on 0-count plants
nrow(cf1)
## [1] 84
data.frame(table(cf1$trt)) # number of plants in each treatment that we are analyzing
## Var1 Freq
## 1 Control 21
## 2 Bagged 22
## 3 Bagged & Selfed 21
## 4 Unbagged & Outcrossed 20
data.frame(table(cf1$accessNum)) # shows number of counts per plant -- 4 means that we counted all four treatments
## Var1 Freq
## 1 1129_74_A 4
## 2 1129_74_B 4
## 3 1129_74_C 3
## 4 1129_74_E 3
## 5 1129_74_F 4
## 6 12_2006_A 4
## 7 128_96_MASS 4
## 8 132_98_MASS 4
## 9 150_58_A 4
## 10 232_46_A 4
## 11 35_40_C 4
## 12 39_60_A 3
## 13 425_74_D 4
## 14 440_66_A 4
## 15 46_18_A 4
## 16 572_64_MASS 4
## 17 624_64_B 4
## 18 667_66_MASS 4
## 19 673_69_B 4
## 20 685_70_A 3
## 21 721_79_B 4
## 22 UNLABELED_1 4
data.frame(table(cf1$plantLineage))
## Var1 Freq
## 1 12_2006 4
## 2 128_96 4
## 3 132_98 4
## 4 150_58 4
## 5 232_46 4
## 6 35_40 4
## 7 39_60 3
## 8 425_74 4
## 9 440_66 4
## 10 46_18 4
## 11 572_64 4
## 12 624_64 4
## 13 667_66 4
## 14 673_69 22
## 15 685_70 3
## 16 721_79 4
## 17 UNLABELED_1 4
# shows which treatments / plants are missing from analysis
# this is different from the plants that just had 0 counts
data.frame(table(interaction(cf1$accessNum , cf1$trt)))
## Var1 Freq
## 1 1129_74_A.Control 1
## 2 1129_74_B.Control 1
## 3 1129_74_C.Control 1
## 4 1129_74_E.Control 1
## 5 1129_74_F.Control 1
## 6 12_2006_A.Control 1
## 7 128_96_MASS.Control 1
## 8 132_98_MASS.Control 1
## 9 150_58_A.Control 1
## 10 232_46_A.Control 1
## 11 35_40_C.Control 1
## 12 39_60_A.Control 0
## 13 425_74_D.Control 1
## 14 440_66_A.Control 1
## 15 46_18_A.Control 1
## 16 572_64_MASS.Control 1
## 17 624_64_B.Control 1
## 18 667_66_MASS.Control 1
## 19 673_69_B.Control 1
## 20 685_70_A.Control 1
## 21 721_79_B.Control 1
## 22 UNLABELED_1.Control 1
## 23 1129_74_A.Bagged 1
## 24 1129_74_B.Bagged 1
## 25 1129_74_C.Bagged 1
## 26 1129_74_E.Bagged 1
## 27 1129_74_F.Bagged 1
## 28 12_2006_A.Bagged 1
## 29 128_96_MASS.Bagged 1
## 30 132_98_MASS.Bagged 1
## 31 150_58_A.Bagged 1
## 32 232_46_A.Bagged 1
## 33 35_40_C.Bagged 1
## 34 39_60_A.Bagged 1
## 35 425_74_D.Bagged 1
## 36 440_66_A.Bagged 1
## 37 46_18_A.Bagged 1
## 38 572_64_MASS.Bagged 1
## 39 624_64_B.Bagged 1
## 40 667_66_MASS.Bagged 1
## 41 673_69_B.Bagged 1
## 42 685_70_A.Bagged 1
## 43 721_79_B.Bagged 1
## 44 UNLABELED_1.Bagged 1
## 45 1129_74_A.Bagged & Selfed 1
## 46 1129_74_B.Bagged & Selfed 1
## 47 1129_74_C.Bagged & Selfed 1
## 48 1129_74_E.Bagged & Selfed 1
## 49 1129_74_F.Bagged & Selfed 1
## 50 12_2006_A.Bagged & Selfed 1
## 51 128_96_MASS.Bagged & Selfed 1
## 52 132_98_MASS.Bagged & Selfed 1
## 53 150_58_A.Bagged & Selfed 1
## 54 232_46_A.Bagged & Selfed 1
## 55 35_40_C.Bagged & Selfed 1
## 56 39_60_A.Bagged & Selfed 1
## 57 425_74_D.Bagged & Selfed 1
## 58 440_66_A.Bagged & Selfed 1
## 59 46_18_A.Bagged & Selfed 1
## 60 572_64_MASS.Bagged & Selfed 1
## 61 624_64_B.Bagged & Selfed 1
## 62 667_66_MASS.Bagged & Selfed 1
## 63 673_69_B.Bagged & Selfed 1
## 64 685_70_A.Bagged & Selfed 0
## 65 721_79_B.Bagged & Selfed 1
## 66 UNLABELED_1.Bagged & Selfed 1
## 67 1129_74_A.Unbagged & Outcrossed 1
## 68 1129_74_B.Unbagged & Outcrossed 1
## 69 1129_74_C.Unbagged & Outcrossed 0
## 70 1129_74_E.Unbagged & Outcrossed 0
## 71 1129_74_F.Unbagged & Outcrossed 1
## 72 12_2006_A.Unbagged & Outcrossed 1
## 73 128_96_MASS.Unbagged & Outcrossed 1
## 74 132_98_MASS.Unbagged & Outcrossed 1
## 75 150_58_A.Unbagged & Outcrossed 1
## 76 232_46_A.Unbagged & Outcrossed 1
## 77 35_40_C.Unbagged & Outcrossed 1
## 78 39_60_A.Unbagged & Outcrossed 1
## 79 425_74_D.Unbagged & Outcrossed 1
## 80 440_66_A.Unbagged & Outcrossed 1
## 81 46_18_A.Unbagged & Outcrossed 1
## 82 572_64_MASS.Unbagged & Outcrossed 1
## 83 624_64_B.Unbagged & Outcrossed 1
## 84 667_66_MASS.Unbagged & Outcrossed 1
## 85 673_69_B.Unbagged & Outcrossed 1
## 86 685_70_A.Unbagged & Outcrossed 1
## 87 721_79_B.Unbagged & Outcrossed 1
## 88 UNLABELED_1.Unbagged & Outcrossed 1
Account for plant lineage and accession number
m1 <- glmer(Freq ~ trt + (1|plantLineage) + (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 709.4 724.0 -348.7 697.4 78
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8555 -1.3244 -0.3544 1.1650 5.5802
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.2085 0.4566
## plantLineage (Intercept) 0.2278 0.4773
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.04258 0.17623 11.590 < 2e-16 ***
## trtBagged -0.97065 0.12274 -7.908 2.62e-15 ***
## trtBagged & Selfed 0.18796 0.08836 2.127 0.0334 *
## trtUnbagged & Outcrossed 0.74998 0.08352 8.980 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.200
## trtBggd&Slf -0.275 0.396
## trtUnbggd&O -0.312 0.421 0.583
# calculate LRT for trt
m2 <- update(m1, .~. - trt)
anova(m1, m2) # highly significant
## Data: cf1
## Models:
## m2: Freq ~ (1 | plantLineage) + (1 | accessNum)
## m1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 3 999.49 1006.79 -496.75 993.49
## m1 6 709.42 724.01 -348.71 697.42 296.07 3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m1 <- glmer(Freq ~ trt + (1|plantLineage) + (1|accessNum), family = poisson, data = cf1)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 709.4 724.0 -348.7 697.4 78
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8555 -1.3244 -0.3544 1.1650 5.5802
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.2085 0.4566
## plantLineage (Intercept) 0.2278 0.4773
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.04258 0.17623 11.590 < 2e-16 ***
## trtBagged -0.97065 0.12274 -7.908 2.62e-15 ***
## trtBagged & Selfed 0.18796 0.08836 2.127 0.0334 *
## trtUnbagged & Outcrossed 0.74998 0.08352 8.980 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.200
## trtBggd&Slf -0.275 0.396
## trtUnbggd&O -0.312 0.421 0.583
# diagnostics
# qq plot
qqnorm(resid(m1), main = "")
qqline(resid(m1)) # outliers
# residual plot
plot(fitted(m1), resid(m1), xlab = "fitted", ylab = "residuals")
abline(0,0)
# possibly outliers
# QQPlot for group-level effects
qqnorm(ranef(m1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$accessNum[[1]])
# # QQPlot for group-level effects
qqnorm(ranef(m1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1)$plantLineage[[1]])
infl <- influence(m1, obs = TRUE) # takes a while to calculate
# x11()
plot(infl, which = 'cook') # some influential points
# visualize model:
sjp.lmer(m1, type = 'fe')
sjp.lmer(m1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...
#check assumptions of model
overdisp_fun <- function(model) {
## number of variance parameters in
## an n-by-n variance-covariance matrix
vpars <- function(m) {
nrow(m)*(nrow(m)+1)/2
}
model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
rdf <- nrow(model.frame(model))-model.df
rp <- residuals(model,type="pearson")
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq/rdf
pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
overdisp_fun(m1) # shows overdispersion
## chisq ratio rdf p
## 2.727300e+02 3.496539e+00 7.800000e+01 2.078860e-23
# here's another way to check for overdispersion
residDev <- sum(residuals(m1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1)
## [1] 4.072153
m1.1 <- glmer.nb(Freq ~ trt +(1|plantLineage) + (1|accessNum), data = cf1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00161649 (tol =
## 0.001, component 1)
summary(m1.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(2.1736) ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1
##
## AIC BIC logLik deviance df.resid
## 571.9 588.9 -279.0 557.9 77
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3912 -0.5687 -0.1032 0.4856 2.6633
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.07951 0.2820
## plantLineage (Intercept) 0.33722 0.5807
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.06410 0.23661 8.724 < 2e-16 ***
## trtBagged -1.06039 0.25396 -4.175 2.97e-05 ***
## trtBagged & Selfed 0.04771 0.24127 0.198 0.843254
## trtUnbagged & Outcrossed 0.78092 0.23708 3.294 0.000988 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtBgg trtB&S
## trtBagged -0.465
## trtBggd&Slf -0.487 0.479
## trtUnbggd&O -0.533 0.457 0.469
# get overall p-value for treatment
m2.1 <- update(m1.1, .~. - trt)
anova(m1.1, m2.1, test = 'LRT') # LRT for trt for paper
## Data: cf1
## Models:
## m2.1: Freq ~ (1 | plantLineage) + (1 | accessNum)
## m1.1: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2.1 4 618.65 628.37 -305.32 610.65
## m1.1 7 571.92 588.93 -278.96 557.92 52.73 3 2.093e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
overdisp_fun(m1.1)
## chisq ratio rdf p
## 60.3298837 0.7734600 78.0000000 0.9310811
# here's another way to check for overdispersion
residDev <- sum(residuals(m1.1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(m1.1)
## [1] 1.151896
# diagnostics
# qq plot
qqnorm(resid(m1.1), main = "")
qqline(resid(m1.1)) # a little better
# residual plot
plot(fitted(m1.1), resid(m1.1), xlab = "fitted", ylab = "residuals")
abline(0,0) # residuals look better for this model
# QQPlot for group-level effects
qqnorm(ranef(m1.1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1.1)$accessNum[[1]])
qqnorm(ranef(m1.1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(m1.1)$plantLineage[[1]])
infl <- influence(m1.1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1
# visualize model:
sjp.lmer(m1.1, type = 'fe')
sjp.lmer(m1.1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...
# get estimated dispersion parameter for NB Model
getME(m1.1, "glmer.nb.theta") # 2.17
## [1] 2.173551
# post-hoc pairwise comparisons with adjusted p-values
# from documentation:
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(m1.1, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: glmer(formula = Freq ~ trt + (1 | plantLineage) + (1 | accessNum),
## data = cf1, family = negative.binomial(theta = 2.17355065418305))
##
## Linear Hypotheses:
## Estimate Std. Error z value
## Control - Bagged == 0 1.06039 0.25396 4.175
## Control - Bagged & Selfed == 0 -0.04771 0.24127 -0.198
## Control - Unbagged & Outcrossed == 0 -0.78092 0.23708 -3.294
## Bagged - Bagged & Selfed == 0 -1.10810 0.25307 -4.379
## Bagged - Unbagged & Outcrossed == 0 -1.84131 0.25634 -7.183
## Bagged & Selfed - Unbagged & Outcrossed == 0 -0.73321 0.24645 -2.975
## Pr(>|z|)
## Control - Bagged == 0 < 0.001 ***
## Control - Bagged & Selfed == 0 0.99726
## Control - Unbagged & Outcrossed == 0 0.00526 **
## Bagged - Bagged & Selfed == 0 < 0.001 ***
## Bagged - Unbagged & Outcrossed == 0 < 0.001 ***
## Bagged & Selfed - Unbagged & Outcrossed == 0 0.01566 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
summary(glht(m1.1, lsm(pairwise ~ trt)), test=adjusted("none")) # pairwise tests for fruit counts, unadjusted
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: glmer(formula = Freq ~ trt + (1 | plantLineage) + (1 | accessNum),
## data = cf1, family = negative.binomial(theta = 2.17355065418305))
##
## Linear Hypotheses:
## Estimate Std. Error z value
## Control - Bagged == 0 1.06039 0.25396 4.175
## Control - Bagged & Selfed == 0 -0.04771 0.24127 -0.198
## Control - Unbagged & Outcrossed == 0 -0.78092 0.23708 -3.294
## Bagged - Bagged & Selfed == 0 -1.10810 0.25307 -4.379
## Bagged - Unbagged & Outcrossed == 0 -1.84131 0.25634 -7.183
## Bagged & Selfed - Unbagged & Outcrossed == 0 -0.73321 0.24645 -2.975
## Pr(>|z|)
## Control - Bagged == 0 2.97e-05 ***
## Control - Bagged & Selfed == 0 0.843254
## Control - Unbagged & Outcrossed == 0 0.000988 ***
## Bagged - Bagged & Selfed == 0 1.19e-05 ***
## Bagged - Unbagged & Outcrossed == 0 6.81e-13 ***
## Bagged & Selfed - Unbagged & Outcrossed == 0 0.002929 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- none method)
count_pub <- within(countFin, {
trt <- mapvalues(trt, from = c("Control","Control_2", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"),
to = c("Control","Control_2", "Bagged", "Bagged\n & Selfed",
"Outcrossed"))
})
count_pub <- droplevels(count_pub)
ggplot(count_pub[!(count_pub$trt %in% 'Control_2'), ], aes(x = trt , y = Freq+ 0.1)) +
geom_boxplot(width = 0.2, fill = 'grey80') +
labs(y = "Number of fruits", x = "Treatment") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 10, label=c("a", "b", "a", "c"),
color="black") +
scale_y_log10(breaks = c(1, 10, 100), limits = c(0.1, 100))
ggsave(paste0(saveDir, "KalmiaFruitNumber_logScale.pdf"), width = 3, height = 4)
# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
cf1.1 <- within(cf1, {
trt = as.factor(as.character(cf1$trt))
})
# refit model with different reference levels
m1.2 <- glmer.nb(Freq ~ trt + (1|plantLineage) + (1|accessNum), data = cf1.1)
summary(m1.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(2.1735) ( log )
## Formula: Freq ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: cf1.1
##
## AIC BIC logLik deviance df.resid
## 571.9 588.9 -279.0 557.9 77
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3912 -0.5687 -0.1032 0.4856 2.6633
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.07951 0.2820
## plantLineage (Intercept) 0.33723 0.5807
## Number of obs: 84, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0037 0.2542 3.949 7.84e-05 ***
## trtBagged & Selfed 1.1081 0.2531 4.379 1.19e-05 ***
## trtControl 1.0604 0.2540 4.175 2.98e-05 ***
## trtUnbagged & Outcrossed 1.8413 0.2563 7.183 6.81e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtB&S trtCnt
## trtBggd&Slf -0.544
## trtControl -0.566 0.547
## trtUnbggd&O -0.598 0.532 0.568
# may help solve convergence issues-- NOPE!
pframe <- data.frame(trt = levels(droplevels(cf1.1$trt)), plantLineage = 99999)
pframe$Freq <- 0
pp <- predict(m1.2, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0
### Calculate CI's (using bootstrap, not accounting for random effects)
## I think that sometimes the model doesn't converge, because it estimates variance
## for random effects to be 0.
## according to this: http://rstudio-pubs-static.s3.amazonaws.com/24365_2803ab8299934e888a60e7b16113f619.html
## you can safely ignore the warnings about convergence
bb2 <- bootMer(m1.2, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = 1000)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00141759 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0014637 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00480463 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0209329 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00568728 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00129131 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00179865 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0292954 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00106755 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00323747 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00339199 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00614815 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00563433 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00567981 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00738054 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00584003 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00210783 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00721664 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00162786 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00125417 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00189829 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00339774 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0131611 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00109138 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00648445 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00132784 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00100767 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.014478 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00113273 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00257754 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0016834 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00136239 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0010853 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00293564 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00146865 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00699374 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00295144 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00279048 (tol =
## 0.001, component 1)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp
# This method is the same, but less straightforward.
# mm <- model.matrix(terms(m1.2), pframe)
# predFun<-function(.) exp(mm%*%fixef(.) )
# bb<-bootMer(m1.2,FUN=predFun,nsim=200) #do this 200 times
#
# # as we did this 200 times the 95% CI will be bordered by the 5th and 195th value
# bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
# pframe$blo<-bb_se[1,]
# pframe$bhi<-bb_se[2,]
# pframe$predMean <- pp
pframe$median = tapply(cf1.1$Freq, INDEX = cf1.1$trt, median)
pframe$trt <- mapvalues(pframe$trt, from = c("Control", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"),
to = c("Control", "Autogamous-\nselfed", "Manipulated-\nselfed",
"Supplemental-\noutcrossed"))
pframe$trt <- relevel(pframe$trt, ref = "Control")
#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
number_ticks <- function(n) {function(limits) pretty(limits, n)}
g0 <- ggplot(pframe, aes(x=trt, y=predMean))+
geom_point()+
labs(y = "Number of fruits", x = "Treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
scale_y_log10(limits =c(1,60), breaks = number_ticks(6)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 10, label=c("a", "b", "a", "c"),
color="black")
g0
ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_logScale.pdf"), width = 3, height = 4)
## Bootstrap CI on linear scale -- not that great!
g1 <- ggplot(pframe, aes(x=trt, y=predMean))+
geom_point()+
labs(y = "Number of Fruits", x = "Treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) - 40, label=c("a", "b", "a", "c"),
color="black")
g1
ggsave(paste0(saveDir, "KalmiaFruitNumber_BSCI_LinearScale.pdf"), width = 3, height = 4)
cp <- droplevels(count_pub[count_pub$trt != 'Control_2',])
# this might actually be the best way
ggplot(cp, aes(x = trt , y = Freq)) +
geom_boxplot(width = 0.2, fill = 'grey80') +
labs(y = "Number of fruits", x = "Treatment") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(70, 70, 70, 70) + 1, label=c("a", "b", "a", "c"),
color="black")
ggsave(paste0(saveDir, "KalmiaFruitNumber_LinearScale.pdf"), width = 3, height = 4)
# calculate fruit size by plant
sizeDF_mean <- as.data.frame(tapply(kfrt$dia_mm, INDEX = kfrt$plantNum, mean))
colnames(sizeDF_mean) = "meanFrtSz"
sizeDF_mean$trt = sapply(X = 1:length(sizeDF_mean$meanFrtSz),
FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]),
split = "__")[[1]][2])
sizeDF_mean$trt <- mapvalues(sizeDF_mean$trt, c(1,2,3,4,5), c("Bagged", "Bagged & Selfed", "Control",
"Unbagged & Outcrossed", "Control_2"))
# reorder
sizeDF_mean$trt <- factor(sizeDF_mean$trt, levels = c("Control", "Control_2", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"))
sizeDF_mean$accessNum = sapply(X = 1:length(sizeDF_mean$meanFrtSz),
FUN = function(x) strsplit(as.character(row.names(sizeDF_mean)[x]),
split = "__")[[1]][1])
ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) +
geom_boxplot(alpha = 0.5) +
# stat_summary(fun.y=mean, geom="line", aes(group = accessNum, color = accessNum)) +
# stat_summary(fun.y=mean, geom="point", aes(group = accessNum, color = accessNum)) +
geom_point(aes(color = trt))
ggplot(sizeDF_mean, aes(x = trt, y = meanFrtSz, fill = trt)) +
geom_boxplot() +
labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") +
scale_fill_brewer(name = "Treatment", palette = "Set1")
ggsave(paste0(saveDir, "KalmiaFruitDiameter.pdf"), width = 10, height = 8)
# visualize , but exclude treatment 5
ggplot(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ],
aes(x = trt, y = meanFrtSz, fill = trt)) +
geom_boxplot() +
labs(y = "Mean Fruit Diameter (mm)", x = "Treatment") +
scale_fill_brewer(name = "Treatment", palette = "Set1") +
theme(axis.text.x = element_text(angle = 35, hjust = 1),
legend.position = 'none')
ggsave(paste0(saveDir, "KalmiaFruitDiameter_trt14Only.pdf"), width = 5, height = 4)
size1 <- within(kfrt, {
trt <- mapvalues(trt, from = c("Bagged", "Bagged & Selfed", "Unbagged",
"Unbagged & Outcrossed", "Unbagged_2"),
to = c("Autogamous-\nselfed", "Manipulated-\nselfed", "Control",
"Supplemental-\noutcrossed", "Control_2"))
})
size1 <- droplevels(size1[size1$trt != "Control_2",])
# get sample sizes for size dataset
nrow(size1) # total number of fruits in analysis for size
## [1] 1035
data.frame(table(size1$accessNum)) # total number of fruits per plant
## Var1 Freq
## 1 1129_74_A 93
## 2 1129_74_B 36
## 3 1129_74_C 69
## 4 1129_74_E 43
## 5 1129_74_F 48
## 6 12_2006_A 22
## 7 128_96_MASS 64
## 8 132_98_MASS 17
## 9 150_58_A 39
## 10 232_46_A 13
## 11 35_40_C 70
## 12 39_60_A 37
## 13 425_74_D 94
## 14 440_66_A 53
## 15 46_18_A 19
## 16 572_64_MASS 31
## 17 624_64_B 43
## 18 667_66_MASS 69
## 19 673_69_B 124
## 20 685_70_A 23
## 21 721_79_B 10
## 22 UNLABELED_1 18
data.frame(table(size1$plantLineage)) # total number of fruits per plant lineage
## Var1 Freq
## 1 12_2006 22
## 2 128_96 64
## 3 132_98 17
## 4 150_58 39
## 5 232_46 13
## 6 35_40 70
## 7 39_60 37
## 8 425_74 94
## 9 440_66 53
## 10 46_18 19
## 11 572_64 31
## 12 624_64 43
## 13 667_66 69
## 14 673_69 413
## 15 685_70 23
## 16 721_79 10
## 17 UNLABELED_1 18
size1$trt <- relevel(as.factor(size1$trt), ref = "Control")
f1 <- lmer(dia_mm ~ trt + (1|plantLineage/accessNum), data = size1)
summary(f1) # final model for paper
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage/accessNum)
## Data: size1
##
## REML criterion at convergence: 1852.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8939 -0.6307 0.0377 0.6695 3.4682
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum:plantLineage (Intercept) 0.04439 0.2107
## plantLineage (Intercept) 0.13357 0.3655
## Residual 0.32638 0.5713
## Number of obs: 1035, groups: accessNum:plantLineage, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.23422 0.11027 38.40
## trtAutogamous-\nselfed -0.51953 0.07274 -7.14
## trtManipulated-\nselfed 0.01439 0.05304 0.27
## trtSupplemental-\noutcrossed 0.73741 0.04881 15.11
##
## Correlation of Fixed Effects:
## (Intr) trtA-s trtM-s
## trtAtgms-sl -0.179
## trtMnpltd-s -0.235 0.387
## trtSpplmn-o -0.296 0.404 0.537
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = size1)
summary(f1) # final model for paper (same as above)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: size1
##
## REML criterion at convergence: 1852.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8939 -0.6307 0.0377 0.6695 3.4682
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.04439 0.2107
## plantLineage (Intercept) 0.13357 0.3655
## Residual 0.32638 0.5713
## Number of obs: 1035, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.23422 0.11027 38.40
## trtAutogamous-\nselfed -0.51953 0.07274 -7.14
## trtManipulated-\nselfed 0.01439 0.05304 0.27
## trtSupplemental-\noutcrossed 0.73741 0.04881 15.11
##
## Correlation of Fixed Effects:
## (Intr) trtA-s trtM-s
## trtAtgms-sl -0.179
## trtMnpltd-s -0.235 0.387
## trtSpplmn-o -0.296 0.404 0.537
# get p-value for trt
f2 <- update(f1, .~. - trt)
anova(f1, f2, "LRT")
## refitting model(s) with ML (instead of REML)
## Data: size1
## Models:
## f2: dia_mm ~ (1 | plantLineage) + (1 | accessNum)
## f1: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## f2 4 2234.7 2254.5 -1113.36 2226.7
## f1 7 1851.4 1886.0 -918.71 1837.4 389.3 3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# diagnostics
# qq plot
qqnorm(resid(f1), main = "")
qqline(resid(f1)) # good
# residual plot
plot(fitted(f1), residuals(f1, type = "deviance"), xlab = "fitted", ylab = "residuals")
abline(0,0)
# QQPlot for group-level effects
qqnorm(ranef(f1)$accessNum[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$accessNum[[1]]) # one possible outlier
# QQPlot for group-level effects
qqnorm(ranef(f1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(f1)$plantLineage[[1]]) # looks good
infl <- influence(f1, obs = TRUE) # takes a while to calculate
plot(infl, which = 'cook') # nothing above 1
# visualize model:
sjp.lmer(f1, type = 'fe')
## Computing p-values via Kenward-Roger approximation. Use `p.kr = FALSE` if computation takes too long.
# Best Linear Unbiased Predictors (BLUPs)
sjp.lmer(f1, type = 're', sort = TRUE) # plot random effects to look for outliers
## Plotting random effects...
## Plotting random effects...
# post-hoc pairwise comparisons with adjusted p-values
# from documentation:
# test = adjusted() multiple test procedures as specified by the type argument
# to adjusted: "single-step" denotes adjusted p values as computed
# from the joint normal or t distribution of the z statistics (default),
summary(glht(f1, lsm(pairwise ~ trt)), test=adjusted("single-step")) # pairwise tests for fruit counts
## Loading required namespace: lmerTest
## Note: df set to 1020
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lmer(formula = dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum),
## data = size1)
##
## Linear Hypotheses:
## Estimate Std. Error
## Control - Autogamous-\nselfed == 0 0.51953 0.07274
## Control - Manipulated-\nselfed == 0 -0.01439 0.05304
## Control - Supplemental-\noutcrossed == 0 -0.73741 0.04881
## Autogamous-\nselfed - Manipulated-\nselfed == 0 -0.53392 0.07156
## Autogamous-\nselfed - Supplemental-\noutcrossed == 0 -1.25694 0.06930
## Manipulated-\nselfed - Supplemental-\noutcrossed == 0 -0.72301 0.04916
## t value Pr(>|t|)
## Control - Autogamous-\nselfed == 0 7.142 <1e-06 ***
## Control - Manipulated-\nselfed == 0 -0.271 0.993
## Control - Supplemental-\noutcrossed == 0 -15.107 <1e-06 ***
## Autogamous-\nselfed - Manipulated-\nselfed == 0 -7.461 <1e-06 ***
## Autogamous-\nselfed - Supplemental-\noutcrossed == 0 -18.138 <1e-06 ***
## Manipulated-\nselfed - Supplemental-\noutcrossed == 0 -14.709 <1e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
summary(glht(f1, lsm(pairwise ~ trt)), test=adjusted("none")) # pairwise tests for fruit counts, unadjusted
## Note: df set to 1020
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lmer(formula = dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum),
## data = size1)
##
## Linear Hypotheses:
## Estimate Std. Error
## Control - Autogamous-\nselfed == 0 0.51953 0.07274
## Control - Manipulated-\nselfed == 0 -0.01439 0.05304
## Control - Supplemental-\noutcrossed == 0 -0.73741 0.04881
## Autogamous-\nselfed - Manipulated-\nselfed == 0 -0.53392 0.07156
## Autogamous-\nselfed - Supplemental-\noutcrossed == 0 -1.25694 0.06930
## Manipulated-\nselfed - Supplemental-\noutcrossed == 0 -0.72301 0.04916
## t value Pr(>|t|)
## Control - Autogamous-\nselfed == 0 7.142 1.74e-12 ***
## Control - Manipulated-\nselfed == 0 -0.271 0.786
## Control - Supplemental-\noutcrossed == 0 -15.107 < 2e-16 ***
## Autogamous-\nselfed - Manipulated-\nselfed == 0 -7.461 1.84e-13 ***
## Autogamous-\nselfed - Supplemental-\noutcrossed == 0 -18.138 < 2e-16 ***
## Manipulated-\nselfed - Supplemental-\noutcrossed == 0 -14.709 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- none method)
# refit m1.1 with treatments in alphabetical order (b/c order isn't preserved in model matrix)
sf1<- within(size1, {
trt = as.factor(as.character(size1$trt))
})
# refit model with different reference levels
f1 <- lmer(dia_mm ~ trt + (1|plantLineage) + (1|accessNum), data = sf1)
summary(f1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dia_mm ~ trt + (1 | plantLineage) + (1 | accessNum)
## Data: sf1
##
## REML criterion at convergence: 1852.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8939 -0.6307 0.0377 0.6695 3.4682
##
## Random effects:
## Groups Name Variance Std.Dev.
## accessNum (Intercept) 0.04439 0.2107
## plantLineage (Intercept) 0.13357 0.3655
## Residual 0.32638 0.5713
## Number of obs: 1035, groups: accessNum, 22; plantLineage, 17
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.71469 0.12074 30.766
## trtControl 0.51953 0.07274 7.142
## trtManipulated-\nselfed 0.53392 0.07156 7.461
## trtSupplemental-\noutcrossed 1.25694 0.06930 18.138
##
## Correlation of Fixed Effects:
## (Intr) trtCnt trtM-s
## trtControl -0.439
## trtMnpltd-s -0.433 0.730
## trtSpplmn-o -0.480 0.765 0.757
pframe <- data.frame(trt = levels(droplevels(sf1$trt)))
pframe$dia_mm = 0
pp <- predict(f1, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0
### Calculate CI's (using bootstrap, not accounting for random effects)
mm <- model.matrix(terms(f1), pframe)
predFun<-function(.) mm%*%fixef(.)
bb<-bootMer(f1,FUN=predFun,nsim = 1000) #do this 1000 times
# get quantiles from bootstrap sample
bb_se<-apply(bb$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb_se[1,]
pframe$bhi<-bb_se[2,]
pframe$predMean <- pp
pframe # print frame, in case reporting in a table
## trt dia_mm blo bhi predMean
## 1 Autogamous-\nselfed 0 3.460322 3.964997 3.714693
## 2 Control 0 4.006854 4.452859 4.234223
## 3 Manipulated-\nselfed 0 4.015142 4.469363 4.248615
## 4 Supplemental-\noutcrossed 0 4.743172 5.176030 4.971629
pframe$trt <- relevel(pframe$trt, ref = "Control")
# refref here, labels are wrong!
#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
g2 <- ggplot(pframe, aes(x=trt, y=predMean))+
geom_point()+
labs(y = "Fruit dia. (mm)", x = "Treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 0.7, label=c("a", "b", "a", "c"),
color="black")
g2
ggsave(paste0(saveDir, "KalmiaFruitDia_BSCI.pdf"), width = 3, height = 4)
## visualize raw data (mean fruit size per plant)
sdf1 <- droplevels(within(sizeDF_mean[!(sizeDF_mean$trt %in% 'Control_2'), ], {
trt <- mapvalues(trt, from = c("Control", "Bagged", "Bagged & Selfed",
"Unbagged & Outcrossed"),
to = c("Control", "Autogamous-\nselfed", "Manipulated-\nselfed",
"Supplemental-\noutcrossed"))
}))
# visualize fruit size
ggplot(sdf1, aes(x = trt, y = meanFrtSz)) +
geom_boxplot(width = 0.2, fill = 'grey80') +
labs(y = "Fruit dia. (mm)", x = "Treatment") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
# annotate
annotate(geom="text", x=c(1,2,3,4), y=c(5,5,5,5) + 2, label=c("a", "b", "a", "c"),
color="black")
ggsave(paste0(saveDir, "KalmiaFruitDia.pdf"), width = 3, height = 4)
# compare two plots
g2 + geom_boxplot(data = sdf1, aes(x = trt, y = meanFrtSz), width = 0.2, alpha = 0)
# read in data
sizeSeed <- read.csv("KalmiaFruitSizeAndSeeds.csv")
plantInds <- 1:nrow(sizeSeed) %in% grep(pattern = "1129", x = sizeSeed$Plant)
sizeSeed$plantLineage <- sapply(1:nrow(sizeSeed), FUN = function(x){
ifelse(plantInds[x], "673_69", paste(strsplit(as.character(sizeSeed$Plant[x]), split = "_")[[1]][1:2], collapse = "_") )})
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) +
geom_point() +
stat_smooth(method = 'lm', formula = y ~ exp(x), se = F) +
labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel')
ggsave(paste0(saveDir, "KalmiaFruitSeeds.pdf"), width = 5, height = 4)
# on log scale
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) +
geom_point() +
stat_smooth(method = 'lm', formula = y ~ x, se = F) +
scale_y_continuous(trans="log") +
labs(y = "log(e) number of seeds")
# GLMER
ss1 <- glmer(NumSeeds ~ Dia..mm. + (1|plantLineage), family = poisson, data = sizeSeed)
summary(ss1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: NumSeeds ~ Dia..mm. + (1 | plantLineage)
## Data: sizeSeed
##
## AIC BIC logLik deviance df.resid
## 139.2 142.0 -66.6 133.2 16
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.32556 -0.41555 0.01926 0.42617 1.28347
##
## Random effects:
## Groups Name Variance Std.Dev.
## plantLineage (Intercept) 0.1422 0.3771
## Number of obs: 19, groups: plantLineage, 15
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.02551 0.57541 0.044 0.965
## Dia..mm. 0.66422 0.12957 5.126 2.96e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Dia..mm. -0.979
ss2 <- update(ss1, .~. - Dia..mm.)
anova(ss1, ss2, test = "LRT") # LRT for paper
## Data: sizeSeed
## Models:
## ss2: NumSeeds ~ (1 | plantLineage)
## ss1: NumSeeds ~ Dia..mm. + (1 | plantLineage)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ss2 2 158.21 160.09 -77.103 154.21
## ss1 3 139.16 141.99 -66.580 133.16 21.047 1 4.482e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
residDev <- sum(residuals(ss1, type = 'deviance')^2) # calculate residual deviance
# this ratio should be about 1 -- larger than 1 suggests overdispersion
residDev / df.residual(ss1)
## [1] 0.8913075
overdisp_fun(ss1) #not overdispersed
## chisq ratio rdf p
## 12.7841497 0.7990094 16.0000000 0.6884697
## model diagnostics
plot(ss1) #cook's distance
qqnorm(resid(ss1), main = "")
qqline(resid(ss1)) # one outlier
# # QQPlot for group-level effects
qqnorm(ranef(ss1)$plantLineage[[1]], main="Normal Q-Q plot for random effects")
qqline(ranef(ss1)$plantLineage[[1]])
infl <- influence(ss1, obs = TRUE) # takes a while to calculate
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00117623 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00117749 (tol =
## 0.001, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00105581 (tol =
## 0.001, component 1)
plot(infl, which = 'cook') # some influential points
# get sample size
nrow(sizeSeed) # total number of individual fruits
## [1] 19
sum(sizeSeed$NumSeeds) # total number of seeds counted
## [1] 382
# total plant lineages
length(unique(sizeSeed$plantLineage))
## [1] 15
# plot data with line
preds <- predict(ss1, newdata = data.frame(Dia..mm. = seq(2.5, 5.5, length.out = 100)), re.form = NA, type = 'response')
predDF <- data.frame(Dia..mm. = seq(2.5, 5.5, length.out = 100), preds = preds)
# visualize data
ggplot(sizeSeed, aes(x = Dia..mm., y = NumSeeds)) +
geom_point() +
geom_line(data = predDF, aes(x = Dia..mm., y = preds), color = 'blue') +
labs(x = 'Fruit Diameter (mm)', y = 'Num Seeds in 1 Carpel')
ggsave(paste0(saveDir, "KalmiaFruitDia_NumSeeds.pdf"), width = 3, height = 4)
sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lsmeans_2.25 estimability_1.2 multcomp_1.4-6
## [4] TH.data_1.0-8 MASS_7.3-45 survival_2.40-1
## [7] mvtnorm_1.0-5 sjPlot_2.2.1 influence.ME_0.9-8
## [10] plyr_1.8.4 lme4_1.1-12 Matrix_1.2-8
## [13] ggplot2_2.2.1
##
## loaded via a namespace (and not attached):
## [1] tidyr_0.6.1 splines_3.3.2 merTools_0.3.0
## [4] modelr_0.1.0 Formula_1.2-1 shiny_1.0.0
## [7] assertthat_0.1 stats4_3.3.2 latticeExtra_0.6-28
## [10] coin_1.1-3 yaml_2.1.14 backports_1.0.5
## [13] lattice_0.20-34 digest_0.6.12 checkmate_1.8.2
## [16] RColorBrewer_1.1-2 minqa_1.2.4 colorspace_1.3-2
## [19] sandwich_2.3-4 htmltools_0.3.5 httpuv_1.3.3
## [22] psych_1.6.12 broom_0.4.1 haven_1.0.0
## [25] purrr_0.2.2 xtable_1.8-2 scales_0.4.1
## [28] stringdist_0.9.4.4 htmlTable_1.9 arm_1.9-3
## [31] tibble_1.2 effects_3.1-2 DT_0.2
## [34] nnet_7.3-12 lazyeval_0.2.0 pbkrtest_0.4-6
## [37] mnormt_1.5-5 magrittr_1.5 mime_0.5
## [40] evaluate_0.10 nlme_3.1-130 foreign_0.8-67
## [43] data.table_1.10.4 tools_3.3.2 stringr_1.1.0
## [46] munsell_0.4.3 cluster_2.0.5 blme_1.0-4
## [49] grid_3.3.2 nloptr_1.0.4 htmlwidgets_0.8
## [52] base64enc_0.1-3 labeling_0.3 rmarkdown_1.3
## [55] gtable_0.2.0 codetools_0.2-15 lmerTest_2.0-33
## [58] abind_1.4-5 sjstats_0.7.1 DBI_0.5-1
## [61] reshape2_1.4.2 sjmisc_2.2.1 R6_2.2.0
## [64] gridExtra_2.2.1 zoo_1.7-14 knitr_1.15.1
## [67] dplyr_0.5.0 Hmisc_4.0-2 rprojroot_1.2
## [70] modeltools_0.2-21 stringi_1.1.2 parallel_3.3.2
## [73] Rcpp_0.12.9 rpart_4.1-10 acepack_1.4.1
## [76] coda_0.19-1 lmtest_0.9-34
# print time of that html document was created
Sys.time()
## [1] "2017-04-24 09:35:53 EDT"